# -*- encoding: utf-8 -*-
"""
@File    : data_load.py
@Author  : lilong
@Time    : 2022/5/3 3:56 下午
"""

import os
import json

path_dir = '/Users/longli/Desktop/SingTaskSu/data/auto_title/data'
data_json = '/Users/longli/Desktop/SingTaskSu/data/auto_title/data_long.json'

titles = []
texts = []
sum_lines = []


def load_data(path):
    with open(path, 'r', encoding='gbk', errors='ignore') as f:
        lines = f.readlines()
        for line in lines[1:]:
            line = line.replace('\n', '').split(',')
            sum_lines.append({'title:': line[0].strip(), 'content:': line[-1].strip()})


for root, dirs, files in os.walk(path_dir):
    for name in files:
        file_path = os.path.join(root, name)
        print(file_path)
        load_data(file_path)


# 保存
with open(data_json, 'w', encoding='utf-8') as f:
    f.write(json.dumps(sum_lines, indent=2, ensure_ascii=False))

